In [ ]:
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from glob import glob 
import IPython.display as ipd 
from tqdm.notebook import tqdm 
import subprocess 
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence
from textblob import TextBlob
import IPython.display as ipd
import os
import nltk
import seaborn as sns
from pydub import AudioSegment
from pydub.effects import normalize, low_pass_filter, high_pass_filter
from vosk import Model, KaldiRecognizer
import wave
import json

加载视频¶

In [ ]:
input_file = 'Experimenter_CREW_999_1_All_1731617801.mp4'
In [ ]:
ipd.Video(input_file,width= 700)
Out[ ]:
Your browser does not support the video element.

打开视频并读取元数据¶

In [ ]:
# 加载视频采集
cap = cv2.VideoCapture(input_file)
In [ ]:
# 帧总数 frames 
noFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT)

# 视频高度和宽度
framesHeight = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # Cap prop frame height 
framesWidth = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # Cap prop frame width 

# 获取每秒帧数
framesSecond = cap.get(cv2.CAP_PROP_FPS)

print("Total number of frames in the video is",noFrames)
print("Frame Height in the video is",noFrames)
print("Frame Width in the video is",noFrames)
print("Frame per second is",framesSecond)
Total number of frames in the video is 10691.0
Frame Height in the video is 10691.0
Frame Width in the video is 10691.0
Frame per second is 29.964424428092247
In [ ]:
# 一旦完成工作,就释放视频,使 Python 不再使用视频
cap.release()

从视频中提取图像¶

In [ ]:
cap = cv2.VideoCapture(input_file)
In [ ]:
ret, img = cap.read()
print(f'Returned {ret} and img of shape {img.shape} ')
Returned True and img of shape (2160, 3840, 3) 
In [ ]:
# 绘制 opencv 图像的辅助函数
def display_cv2_img(img, figsize = (10,10)):
    img_ = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig, ax = plt.subplots(figsize = figsize)
    ax.imshow(img_)
    ax.axis("off")
In [ ]:
display_cv2_img(img)
No description has been provided for this image
In [ ]:
cap.release()

显示视频中的多个帧¶

In [ ]:
# 创建一个 5x5 的网格,整个图像区域为 30x20 英寸
fig, axs = plt.subplots(5, 5, figsize=(30, 20))
# 将返回的二维 axs 数组转换为一维数组,可以像访问列表一样方便地访问每个子图
axs = axs.flatten()

# 读取视频文件
cap = cv2.VideoCapture(input_file) # 打开指定路径的 input_file 视频文件
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 获取视频的总帧数,并将其转换为整数。

img_idx = 0 # 初始化索引 img_idx,选择不同的子图区域进行绘制
for frame in range(n_frames): # 迭代视频的每一帧
    ret, img = cap.read() # 读取视频的当前帧, img 为该帧的图像数据
    if not ret: # 如果成功读取,ret 为 True
        break
    if frame % 100 == 0: # 每 100 帧读取一次。这样只处理视频中的一部分帧,而不是每一帧,节省内存和计算资源
        if img_idx < len(axs):  # 确保 img_idx 不超过 25
            axs[img_idx].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # 使用 OpenCV 读取的图像默认是 BGR 格式,而 Matplotlib 显示图像时使用 RGB 格式,用 cv2.cvtColor 将图像从 BGR 转换为 RGB。然后在子图区域中显示这帧图像。
            axs[img_idx].set_title(f'Frame: {frame}') # 设置当前子图的标题,显示视频帧的索引
            axs[img_idx].axis('off') # 关闭子图的坐标轴,以便仅显示图像
            img_idx += 1 # 更新子图索引,确保下次图像显示在下一个子图中

plt.tight_layout() # 调整子图之间的间距,使得子图不会重叠
plt.show() # 显示绘制的所有图像
cap.release() # 释放视频捕获对象 cap,关闭视频文件并释放相关资源
No description has been provided for this image

提取音频(wav 格式)¶

In [ ]:
# 音频转换为 单声道(1 声道)、16kHz 采样率、线性 PCM 16-bit,符合 API 要求
audio_output = "audio.wav" # 音频输出文件路径, 输出保存为 audio.wav
#subprocess.run(["ffmpeg", "-i", input_file, "-q:a", "0", "-map", "a", audio_output])
subprocess.run( # 通过 Python 的 subprocess 模块调用外部命令(在这里是 ffmpeg 命令)。subprocess.run() 运行后会等待命令执行完成。
    [ # 传递给 ffmpeg 的命令和参数。
    "ffmpeg", # 调用 ffmpeg 工具来处理音频文件
    "-i", input_file, # 指定输入文件,input_file 是音频/视频文件的路径,ffmpeg 会从中提取音频
    "-ac", "1", # 设置音频的声道数为 1,即将音频转换为单声道(Mono), 符合 API 要求的单声道音频。 
    "-ar", "16000", # 设置音频的采样率为 16 kHz(16,000 Hz)。常用的语音采样率,符合音频处理或语音识别 API 的要求
    "-acodec", "pcm_s16le", # 指定音频编码格式为 pcm_s16le,即线性 PCM 编码,16-bit 小端格式(未压缩的音频格式,常用于高质量音频)
    audio_output # 输出文件路径,转换后的音频将保存到这个路径(即 audio.wav)
    ]
    )

print(f"音频已保存为 {audio_output}")
ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.6)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox --enable-neon
  libavutil      59. 39.100 / 59. 39.100
  libavcodec     61. 19.101 / 61. 19.101
  libavformat    61.  7.100 / 61.  7.100
  libavdevice    61.  3.100 / 61.  3.100
  libavfilter    10.  4.100 / 10.  4.100
  libswscale      8.  3.100 /  8.  3.100
  libswresample   5.  3.100 /  5.  3.100
  libpostproc    58.  3.100 / 58.  3.100
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'Experimenter_CREW_999_1_All_1731617801.mp4':
  Metadata:
    major_brand     : mp42
    minor_version   : 1
    compatible_brands: isommp41mp42
    creation_time   : 2025-03-04T22:42:51.000000Z
  Duration: 00:05:57.77, start: 0.000000, bitrate: 4031 kb/s
  Stream #0:0[0x1](eng): Video: hevc (Main) (hvc1 / 0x31637668), yuvj420p(pc), 3840x2160 [SAR 1:1 DAR 16:9], 3880 kb/s, 29.96 fps, 29.97 tbr, 30k tbn (default)
      Metadata:
        creation_time   : 2025-03-04T22:42:51.000000Z
        handler_name    : Core Media Video
        vendor_id       : [0][0][0][0]
        encoder         : HEVC Coding
  Stream #0:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 157 kb/s (default)
      Metadata:
        creation_time   : 2025-03-04T22:42:51.000000Z
        handler_name    : Core Media Audio
        vendor_id       : [0][0][0][0]
Stream mapping:
  Stream #0:1 -> #0:0 (aac (native) -> pcm_s16le (native))
Press [q] to stop, [?] for help
Output #0, wav, to 'audio.wav':
  Metadata:
    major_brand     : mp42
    minor_version   : 1
    compatible_brands: isommp41mp42
    ISFT            : Lavf61.7.100
  Stream #0:0(eng): Audio: pcm_s16le ([1][0][0][0] / 0x0001), 16000 Hz, mono, s16, 256 kb/s (default)
      Metadata:
        creation_time   : 2025-03-04T22:42:51.000000Z
        handler_name    : Core Media Audio
        vendor_id       : [0][0][0][0]
        encoder         : Lavc61.19.101 pcm_s16le
音频已保存为 audio.wav
[out#0/wav @ 0x14c10cc20] video:0KiB audio:11180KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.000681%
size=   11180KiB time=00:05:57.77 bitrate= 256.0kbits/s speed=1.83e+03x    
In [ ]:
print(os.path.exists("audio.wav"))  # 确保文件存在
print(os.path.getsize("audio.wav")) # 确保文件大小不是 0
ipd.Audio("audio.wav")
True
11448816
Out[ ]:
Your browser does not support the audio element.

加载语音¶

In [ ]:
# 加载音频文件
recognizer = sr.Recognizer()
audio_file = "audio.wav"

音频切片¶

In [ ]:
# 加载音频文件
audio = AudioSegment.from_wav(audio_file)
chunk_length = 5000  # 每个切片 5 秒(单位:毫秒)

# 通过列表推导式将音频分割成多个片段,每个片段的长度为 chunk_length 毫秒。
# range 函数生成一个从 0 到音频长度的序列,步长为 chunk_length。将音频文件分割成多个长度为 chunk_length 的切片,直到文件末尾。
# 根据 range 中的索引,切割音频片段。每个切片的大小为 chunk_length 毫秒
chunks = [audio[i:i + chunk_length] for i in range(0, len(audio), chunk_length)]

# 保存每个音频切片
for i, chunk in enumerate(chunks):  # 对每个音频切片进行迭代。i 是切片的索引,chunk 是对应的音频片段。
    if not os.path.exists("wav"):  # 检查输出文件夹 "wav" 是否存在
        os.makedirs("wav")  # 如果文件夹不存在,创建文件夹
    chunk.export(f"wav/chunk_{i}.wav", format="wav")  # 将当前音频切片保存为单独的 WAV 文件。文件名为 chunk_{i}.wav,其中 i 是切片的索引。例如,chunk_0.wav,chunk_1.wav
    #print(f"已保存:chunk_{i}.wav ({i + 1}/{len(chunks)})")  # 打印已保存的切片进度
    try:
        audio = AudioSegment.from_wav(audio_file)  # 重新加载原音频文件
    except Exception as e:
        print(f"加载音频文件时出错: {e}")  # 如果加载音频文件时出错,打印错误信息

音频切片音质加强¶

In [ ]:
def enhance_audio(input_audio, output_audio):
    #  加载音频文件
    audio = AudioSegment.from_wav(input_audio)
    # 增强音量
    louder_audio = audio + 10  # 将音频增加 10 分贝(dB)来增强音量, 使音频更响亮
    # 去除低频噪音,保留人声
    # 人类的语音通常在 300 Hz 到 3 kHz 之间,因此保留这个频率范围有助于提高人声的清晰度
    filtered_audio = high_pass_filter(louder_audio, cutoff=300)  # 设定高通滤波器的截止频率为 300 Hz,音频中低于 300 Hz 的频率将被滤除,去除低频噪音,如风声、低音等
    filtered_audio = low_pass_filter(filtered_audio, cutoff=3000)  # 设定低通滤波器的截止频率为 3000 Hz。音频中高于 3000 Hz 的频率将被滤除。
    # 归一化,平衡音量
    normalized_audio = normalize(filtered_audio) # 归一化音频的目的是调整音频的整体音量,使得音频的最大音量达到某个标准值(通常是 0 dB),帮助平衡音频的音量,避免过高或过低的音量
    # 保存处理后的音频
    normalized_audio.export(output_audio, format="wav")
    #print(f"处理后的音频已保存:{output_audio}")
In [ ]:
# 调用函数
# enhance_audio("wav/chunk_0.wav", "wavEnhanced/chunk_0_enhanced.wav")

for i in range(len(chunks)):  # 遍历所有切片
    chunk_file = f"wav/chunk_{i}.wav"
    enhance_audio(f"wav/chunk_{i}.wav", f"wavEnhanced/chunk_{i}_enhanced.wav")
    

文本识别whisper¶

In [ ]:
import whisper

model = whisper.load_model("base")  # 可选 "tiny", "base", "small", "medium", "large"

# 将音频文件 audio_file 转录成文本
def transcribe_whisper(audio_file):
    try:
        result = model.transcribe(audio_file) # 从返回的字典中获取识别出的文本。该文本就是音频文件中的内容。
        print(f"Whisper 识别结果: {result['text']}") # 打印出识别结果,方便调试和查看结果
        return result["text"]
    except Exception as e: # 音频文件损坏、格式不支持,或者其他问题导致模型无法正常转录
        print(f"错误: {e}")
        return None

#transcribe_whisper("wavEnhanced/chunk_0_enhanced.wav")
In [ ]:
# 将通过 Whisper 模型转录的音频切片文本数据存储到一个 CSV 文件中,并且每个切片的文本数据都包含相应的时间戳

# 初始化列表存储转录数据
# 创建空的列表 transcripts,存储每个音频切片的转录文本及其对应的时间戳。
# 每个元素将是一个包含 [timestamp, transcript] 的列表
transcripts = []  # 存储所有的转录数据

# 遍历音频切片
for i in range(len(chunks)):  # chunks 是音频切片的列表,len(chunks) 是切片的总数,i 是遍历音频切片的索引
    chunk_file = f"wavEnhanced/chunk_{i}_enhanced.wav" # 对于每一个索引 i,构造对应音频切片文件的路径。所有的音频切片已经被保存到 wavEnhanced 文件夹中,每个切片的文件名格式为 chunk_{i}_enhanced.wav。
    
    # 识别文本,  转录音频切片
    try:
        transcript = transcribe_whisper(chunk_file) # 使用之前定义的 transcribe_whisper(chunk_file) 函数来转录当前音频切片的内容,并将转录文本存储到 transcript 变量中
    
        # 计算时间戳(每个切片 5 秒)
        timestamp = i * 5  # 每个音频切片的长度为 5 秒,因此通过 i * 5 计算该切片的时间戳。timestamp 表示这个切片开始的时间点。假设第 i 个切片的起始时间为 i * 5 秒
    
        # 存储转录结果,存入列表
        transcripts.append([timestamp, transcript]) # 将每个音频切片的时间戳和对应的转录文本(timestamp 和 transcript)作为一个列表添加到 transcripts 列表中
    except Exception as e:
        #print(f"无法转录音频切片 {chunk_file}: {e}")
        transcript = ""  # 如果失败,存储空文本
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果: 
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Okay, so the
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  you're going to complete and use the key automation and the object detection system. So that means you will not need to.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  upgrade vehicle, and keep your needs off the steering wheel and meet off the pedals much better than that drive.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Okay, so when you see that some driver indicator highlight green make sure you don't hit the brake at any point during
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  drive. But this dog, the car cab look at will have the hood yes for getting visual
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  that for targets that are in the environment, the car cabboultk outshapaytich, and that screen paid them.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  and if one else turns the mission. Remember to tap the targets on that ODS as well as completing the mission.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  workstation task.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Alright, so all drivers picture fit on the brake. You're not removing the brake.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  your foot from the brake until I instruct you to remove it or cap driver you may start the car.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I'll drive with your foot still on the brake.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  You drove through me.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  NAS pocket driver
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  1.0.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Go back. Car cab driver and only the car cab driver.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Yes. Yeah, right. Make sure everyone is in the drive.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I am now in drive. Car cap driver and only the car cap driver.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Maybe get moving forward.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  as I drove through the
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  some driver on.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  What color skater?
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Do you really like cross- Brigade? Yes it does.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  fuck, bag
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  It's draining, out of it.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I see scooters on both sides of the road.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  this one. Yeah, it motorcycles like an electric scooter.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Yeah, that's on the left side of the road. Passing now
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Yeah, that was a lot.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  알패 이벤튼 이벤튼
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Do you have any target? Not yet. I hit.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Okay, that's right, but it's all okay
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Those motorcycles were giving me a car.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Its kind of peel out there's bok in the sun and the roof uh pass RC it's uh
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  10 o'clock. I think we used.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  and then I saw the© all in from the tank
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  We're also passing out our one.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Why is a car
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  three so far behind car two.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  No.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果: 
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  OK, as hard as the gap.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Is the OTS working?
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Yes, sometimes there is a green butt there. This I can find out.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  No, you won't see anything, but Gapry should see something in the passenger seat of Podia.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  It should be a wide car right now. Right now. It should be a wide car right now.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  You're walking in the white catwalk to see if you can see the catwalk or the catwalk
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Stand or SUV F in the air. Yes, SUV.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I see a camel.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I got cars coming up.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  There's a driving white car on the left.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Yeah, it's going to be going to the left. Backpog. But it's not.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I think that was it. We have another white...
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  SUV and passing that one left now.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I think that was... I'm scared. I'm not scared.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  한번 해봤습니다
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Passing military checkpoint
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果: 
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  That was a truck and it was a tablet on the truck.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  covering it. It's got like model. Yeah.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Yeah
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果: 
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Jack, do you feel like this is getting...
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  you enough to understand or do you want to continue? Yeah, I think.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I think it's, how about you comment it? I think it's enough.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  I mean I'm having fun but we can stop if you want to.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Yeah, we kill it. So, um, all dry.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  peppers or the pepper for the breaks. Okay. Come here.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  of braking now?
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  Please don't run through me. No!
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  We've been we're ended
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/whisper/transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead
  warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Whisper 识别结果:  in park now.
In [ ]:
# 创建 DataFrame
df = pd.DataFrame(transcripts, columns=["Timestamp", "Transcription"]) # 使用 pandas.DataFrame 将 transcripts 列表转换成 DataFrame。每一行包含 Timestamp(时间戳)和 Transcription(转录文本)

# 先显示前几行,确保数据正确
print(df.head())

# 保存为 CSV,命名格式:video_1731617801_transcripts.csv
csv_filename = "whisper_video_1731617801_transcripts.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8") # 将转录数据保存为 CSV 文件。CSV 文件名为 whisper_video_1731617801_transcripts.csv,并且不保存索引 (index=False),使用 UTF-8 编码保存文件。

print(f" Whisper 转录数据已保存到 {csv_filename}")
   Timestamp                                      Transcription
0          0                                                   
1          5                                       Okay, so the
2         10   you're going to complete and use the key auto...
3         15   upgrade vehicle, and keep your needs off the ...
4         20   Okay, so when you see that some driver indica...
 Whisper 转录数据已保存到 whisper_video_1731617801_transcripts.csv

情感分析: TextBlob¶

In [ ]:
nltk.download("punkt")  # 需要分词
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/bocongzhao/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Out[ ]:
True
In [ ]:
# 定义情感分析函数
# TextBlob 是一个用于文本处理的 Python 库, 进行情感分析、翻译等任务
# TextBlob 提供的情感极性值。极性值的范围从 -1 到 1:
# -1 代表强烈的负面情绪
# 0 代表中性情绪
# 1 代表强烈的正面情绪
# TextBlob 的情感分析是基于词汇和语法规则的,可能不适用于复杂的上下文

def get_sentiment(text):
    if not text.strip():  # 如果文本为空,返回 'Neutral'
        return "Neutral"
    
    analysis = TextBlob(text) # 创建TextBlob 对象,其中 text 是要进行分析的字符串
    polarity = analysis.sentiment.polarity  # 情绪极性(-1 负面,0 中性,1 正面)
    
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"
In [ ]:
# 在 DataFrame 里添加情感分析结果
# df["Transcription"]:DataFrame 中存储转录文本的列
# .apply(get_sentiment):对每一行的文本应用 get_sentiment 函数,将每个文本的情感分析结果计算出来,并生成一个新的列 Sentiment
# df["Sentiment"]:将情感分析的结果存储在 DataFrame 的 Sentiment 列中
df["Sentiment"] = df["Transcription"].apply(get_sentiment)

# 显示前几行,检查结果
print(df.head())

# 重新保存 CSV
csv_filename = "video_1731617801_sentiments.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8") # 将 DataFrame 保存为 CSV 文件.不保存行索引; 以 UTF-8 编码保存文件。

print(f" 情感分析已完成,数据已保存到 {csv_filename}")
   Timestamp                                      Transcription Sentiment
0          0                                                      Neutral
1          5                                       Okay, so the  Positive
2         10   you're going to complete and use the key auto...  Positive
3         15   upgrade vehicle, and keep your needs off the ...  Positive
4         20   Okay, so when you see that some driver indica...  Positive
 情感分析已完成,数据已保存到 video_1731617801_sentiments.csv

数据可视化¶

In [ ]:
# 读取 CSV 数据
df = pd.read_csv("video_1731617801_sentiments.csv")

# 1️⃣ **直方图 - 每个 5 秒窗口的语音活动**
plt.figure(figsize=(10, 5))
sns.histplot(df["Timestamp"], bins=range(0, int(df["Timestamp"].max()) + 5, 5), kde=False)
plt.xlabel("Time (seconds)")
plt.ylabel("Number of speech lines")
plt.title("Voice activity per 5-second window")
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

# 2️⃣ **情感分类图 - 统计 Positive / Neutral / Negative 的比例**
plt.figure(figsize=(7, 5))
sns.countplot(x="Sentiment", data=df, palette={"Positive": "green", "Neutral": "gray", "Negative": "red"})
plt.xlabel("Emotional classification")
plt.ylabel("Qty")
plt.title("Distribution of Emotional Analysis")
plt.grid(axis="y")
plt.show()
No description has been provided for this image
/var/folders/ny/3djxxhdx4kz2zxhn2yx5jgqr0000gn/T/ipykernel_41850/708763567.py:16: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x="Sentiment", data=df, palette={"Positive": "green", "Neutral": "gray", "Negative": "red"})
No description has been provided for this image